labeled <- read.csv("data/train.csv", header = TRUE, sep = ",")
unlabeled <- read.csv("data/test.csv", header = TRUE, sep = ",")
par(mfrow = c(3, 4))
for (i in 2:13) {
hist(labeled[, i], main = names(labeled)[i], xlab = names(labeled)[i])
}

par(mfrow = c(3, 4))
for (i in 2:12) {
hist(unlabeled[, i], main = names(unlabeled)[i], xlab = names(unlabeled)[i])
}

pairs(labeled[, 2:13], main = "Scatter plot of each variable in labeled dataset", pch = 0.1, cex = 0.05)

library(corrplot)
## corrplot 0.92 loaded
par(mfrow = c(1, 1))
cor_mat <- cor(labeled[, 2:13])
corrplot(cor_mat, method = "color", type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45, addCoef.col = "black",
col = colorRampPalette(c("white", "red"))(100))

par(mfrow = c(3, 4))
for (i in 2:13) {
boxplot(labeled[, i], main = names(labeled)[i], xlab = names(labeled)[i])
}
